# -------------- Setup --------------
# Load packages
library(tidyverse)
library(janitor)
library(here)
library(tidycensus)
library(ggmap)
library(sf)
library(tigris)
library(showtext)
library(sysfonts)
library(tidygeocoder)
library(tmap)
library(tmaptools)
library(waffle)
library(patchwork)
# Set census API key
census_api_key(Sys.getenv("CENSUS_API_KEY"))Final Blog Post
Why voter data?
Since childhood, I’ve been interested in politics– a fault I attribute to my dad. We were your typical liberal Prius-driving NPR listeners and CNN-blaring-in-the-background household. Upon moving to Santa Barbara in 2017 for my undergraduate degree, I’ve volunteered or worked in every election, traveling up and down the county to knock on doors for local candidates with pro-environment and pro-housing values.
Most people assume Santa Barbara is a beautiful oceanside haven for liberal, white, and wealthy– and to some extent, it is. But I’d like to explore some specific trends in voter demographics at a county level:
- How are Democratic and Republican voters spatially distributed throughout the county?
- Is there a partisan difference between the ages of voters in Santa Barbara and Goleta?
- How do voter registrations change over time?
Most visualizations of voter data happen at the county level, rather than within counties. Furthermore, election data from the 2024 general election has not been fully certified yet for most counties in California. I aim to provide a quick overview of the political composition of our county for people interested in local politics.
About the data
The Santa Barbara County voter registration data used for this project was requested from SB County Elections, and is dated for May 9, 2025. It contains voter registration information (voter name, address, contact information, voting precinct, and political party affiliation).
For census data, I use the tidycensus package, with 2023 geometries and population surveys.
Visualizations
How old are Democrats vs. Republicans? For this visualization, I chose stacked bar charts to visualize the age breakdowns within each party, narrowing the focus to the cities of Santa Barbara and Goleta. On average, Democrats are slightly younger than Republicans in both cities. However, older voters represent the greatest share of voters in both political parties. I would have liked to make the bars different colors for each political party, but was having difficulty separating the bars.
When do people register to vote? Voters generally register to vote closer to elections, and we can see that there is a noticeable spike in the line graph of number of registrations each general election year (labeled on the x-axis).
How are voters distributed across the county? I wanted to plot the difference in percentage of Democrats compared to Republicans across the county. I chose a chloropleth map to visualize this, because it is a good way to display spatial relationships. In Santa Barbara, urban centers in South County tend to lean heavily Democratic, while more rural areas are Republican. One challenge with displaying partisanship on a chloropleth map, however, is the visual overrepresentation of sparsely populated but large geographic areas. It appears as though a large portion of the county is red, when in reality very few people live in those areas, and the county as a whole leans solidly Democratic. This is why I’ve included a supplementary waffle plot displaying the proportion of voters by party.
Code for Visualizations
# -------------- Theme & Colors --------------
# Customize font and theme
font_add_google("EB Garamond", "EBGaramond")
showtext_auto()
theme_set(theme_bw())
# Custom colors
party_colors <- c(
"DEM" = "#0f6ba8",
"REP" = "#c83236",
"NPP" = "#694e7a",
"Other" = "gray50",
"Democrat" = "#026CA1",
"Republican" = "#CA2C2C",
"No Party Preference" = "#CECECE",
"American Independent" = "#989898",
"Libertarian" = "#FFD500",
"Unknown" = "gray30",
"Peace and Freedom" = "#7459A7",
"Green" = "#54B643"
)# -------------- Data Import --------------
# Read in voter data
full_file <- read_delim(
here::here("data/Countywide_Voter Registration 030325.TXT"),
delim = "\t"
) %>%
clean_names()
# full_file <- read_delim(
# here::here("EDS-240-DATA-VIZ/rubinstein-eds240-HW4/data/Countywide_Voter Registration 030325.TXT"),
# delim = "\t"
# ) %>%
# clean_names()Visualization 1: Age Composition by Party
# -------------- Visualization 1: Age Composition by Party --------------
# Calculate age distribution by city and party
voter_ages <- full_file %>%
mutate(
age = interval(dob, today()) %/% years(1),
age_group = cut(
age,
breaks = c(18, 25, 35, 50, 65, Inf),
labels = c("18-25", "26-35", "36-50", "51-65", "65+"),
right = FALSE
),
party_code = factor(party_code, levels = c("DEM", "REP"))
) %>%
filter(!is.na(party_code)) %>%
filter(city %in% c("SANTA BARBARA", "GOLETA")) %>%
group_by(city, party_code) %>%
mutate(city_party_total = n()) %>%
group_by(city, party_code, age_group) %>%
summarize(
count = n(),
pct = count / first(city_party_total),
.groups = "drop"
)
# Fix city order
voter_ages$city <- factor(
voter_ages$city,
levels = c("SANTA BARBARA", "GOLETA")
)
# Create age distribution plot with party comparison as proportions and labels
ggplot(
voter_ages,
aes(x = party_code, y = pct, fill = age_group)
) +
geom_col() +
# Add text labels inside the bars
geom_text(
aes(label = scales::percent(pct, accuracy = 1)),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold",
size = 3
) +
facet_grid(city ~ .) +
scale_fill_brewer(palette = "Purples", direction = 1) +
scale_y_continuous(
labels = scales::percent,
breaks = seq(0, 1, 0.2)
) +
labs(
title = "Age Distribution of Voters in Santa Barbara and Goleta by Party",
subtitle = "Comparing proportions of Democratic and Republican voters by age group\n",
x = NULL,
y = "Proportion of Registered Voters\n",
fill = "Age Group",
caption = "\nSource: Santa Barbara Voter File (2025)"
) +
theme(
panel.grid.major.y = element_blank(),
text = element_text(family = "EBGaramond"),
axis.text = element_text(family = "EBGaramond"),
plot.title = element_text(
size = 12,
face = "bold",
family = "EBGaramond"
),
strip.text = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(family = "EBGaramond"),
plot.caption = element_text(family = "EBGaramond"),
legend.text = element_text(family = "EBGaramond")
)Visualization 2: Registration Trends
# -------------- Visualization 2: Registration Trends --------------
# Analyze registrations over time
yearly_registration <- full_file %>%
mutate(year = format(registration_date, "%Y")) %>%
filter(year >= "2000") %>%
filter(year <= "2024") %>%
count(year)
# Create time series plot
ggplot(yearly_registration, aes(x = year, y = n)) +
geom_line(group = 1, color = "purple") +
geom_point(color = "purple") +
labs(
title = "Voter Registrations in Santa Barbara County",
subtitle = "Number of new registrations from 2000 - 2024, showing spikes in new registrations during general election years",
x = "Year",
y = "Number of New Registrations"
) +
scale_x_discrete(
breaks = seq(
from = min(yearly_registration$year),
to = max(yearly_registration$year),
by = 4
)
) +
theme(
panel.grid.major.y = element_blank(),
text = element_text(size = 24, family = "EBGaramond"),
axis.text = element_text(family = "EBGaramond"),
plot.title = element_text(face = "bold", family = "EBGaramond"),
plot.subtitle = element_text(family = "EBGaramond"),
plot.caption = element_text(family = "EBGaramond"),
legend.text = element_text(family = "EBGaramond")
)Visualization 3: Spatial Analysis of Voter Distribution
# -------------- Geocoding & Spatial Analysis --------------
# Prepare addresses for geocoding
address_full <- full_file %>%
mutate(street_address_test = paste(address_number, street_name, sep = " "))
# Geocoding function
geocode_addresses <- function() {
census_list <- list()
for (j in 1:25) {
start_index <- ((j - 1) * 10000) + 1
end_index <- j * 10000
census_list[[j]] <- address_full[start_index:end_index, ] %>%
tidygeocoder::geocode(
street = street_address_test,
city = city,
state = state,
method = "census"
)
}
final_census_df <- bind_rows(census_list) %>%
filter(party_code %in% c("DEM", "REP", "NPP")) %>%
drop_na(lat)
# Save for future use
saveRDS(final_census_df, "geocoded_addresses.rds")
return(final_census_df)
}
final_census_df <- geocode_addresses()
# final_census_df <- readRDS("geocoded_addresses.rds")# -------------- Spatial Visualization --------------
# Convert to geocoded addresses to sf object
final_census_sf <- st_as_sf(
final_census_df,
coords = c("long", "lat"),
crs = 4326
)
# Get total population by census tract for Santa Barbara County
sb_pop_2023 <- get_acs(
geography = "tract",
variables = "B01003_001", # Total population variable
state = "CA",
county = "Santa Barbara",
year = 2023,
geometry = TRUE
) %>%
st_transform(crs = 4326)
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|=================================== | 50%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 59%
|
|========================================== | 59%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 85%
|
|======================================================================| 100%
# Join voters to tracts and calculate party percentages
voters_with_tract <- st_join(final_census_sf, sb_pop_2023)
party_by_tract <- voters_with_tract %>%
st_drop_geometry() %>%
group_by(GEOID) %>%
mutate(tract_total = n()) %>%
group_by(GEOID, party_code, tract_total) %>%
summarize(count = n()) %>%
mutate(percentage = count / tract_total * 100) %>%
ungroup()
# Create a dataset with relative Dem-Rep percentages (excluding other parties)
two_party_comparison <- party_by_tract %>%
filter(party_code %in% c("DEM", "REP")) %>%
group_by(GEOID) %>%
mutate(two_party_total = sum(count)) %>%
# Recalculate percentages based only on Dem + Rep voters
mutate(two_party_pct = count / two_party_total * 100) %>%
select(GEOID, party_code, two_party_pct) %>%
pivot_wider(
names_from = party_code,
values_from = two_party_pct
) %>%
mutate(diff = DEM - REP)
# Join the difference data to the geometry
tract_diff_map <- sb_pop_2023 %>%
select(GEOID, geometry) %>%
left_join(two_party_comparison, by = "GEOID") %>%
filter(!is.na(diff))
# Create the divergent map
ggplot() +
geom_sf(data = tract_diff_map, aes(fill = diff)) +
scale_fill_gradient2(
low = party_colors["REP"],
mid = "#FFFFFF",
high = party_colors["DEM"],
midpoint = 0,
name = "Dem-Rep Difference (%)"
) +
labs(
title = "Democratic vs. Republican Voter Distribution by Census Tract",
subtitle = "Percentage difference between parties (excluding other affiliations)"
) +
theme_void()# -------------- Waffle Chart --------------
# Create waffle chart of party distribution
waffle_counts <- full_file %>%
mutate(
party_code = if_else(
party_code %in% c("DEM", "REP", "NPP"),
party_code,
"Other"
)
) %>%
count(party_code) %>%
mutate(
party_code = factor(
party_code,
levels = c("DEM", "REP", "NPP", "Other")
)
)
# Limit total squares for better display
max_squares <- 200
if (sum(waffle_counts$n) > max_squares) {
waffle_counts <- waffle_counts %>%
mutate(n = round(n * max_squares / sum(n)))
}
# Create waffle chart
ggplot(waffle_counts, aes(fill = party_code, values = n)) +
geom_waffle(color = "white", size = 0.5, n_rows = 10) +
scale_fill_manual(values = party_colors) +
coord_fixed() +
theme_void() +
labs(
title = "Party Distribution in Santa Barbara County",
fill = "Party"
)